/*******************************************************************************
																				
	DESCRIPTION: This file trains the linear model.
	
*******************************************************************************/

clear all
global id_code 116

foreach vars in _SeqDrop_incIndiv_ _SeqDrop_incOther_ _SeqDrop_emplHist_ ///
			_SeqDrop_incHist_ _SeqDrop_migHist_ _SeqDrop_indu_ _SeqDrop_mun_ _ {
	
	if "`vars'" == "_" {
		local unemplSpan "0 6 12"
	}
	else {
		local unemplSpan "0"
	}

	
 	foreach model in Full {
		
		foreach year in 2006 {

		*******************************************************************************
		 * Keep only data used for parametre tuning, training and creating weights
		*******************************************************************************
		
		use "${data}/002_DataForR_`model'`vars'`year'.dta", clear
		
		* By keeping only those that appear in DataForR, but not those in 
		* PredEnsemble we keep observations that were used for parameter tuning, 
		* training etc. in the ML models
		merge 1:1 n using "${data}/003_MainWithEnsemblePred_`model'`vars'`year'.dta"
		keep if _merge==1
		drop _merge
		
		*******************************************************************************
		 * Make and save predictions
		*******************************************************************************
			
		foreach unempl in `unemplSpan' {
			
			if "`vars'"=="_SeqDrop_incIndiv_" {
			local varSet "$demoEdu"
			}
			
			if "`vars'"=="_SeqDrop_incOther_" {
			local varSet "$demoEdu $incIndiv"
			}
			
			if "`vars'"=="_SeqDrop_emplHist_" {
			local varSet "$demoEdu $incIndiv $incOther"
			}
			
			if "`vars'"=="_SeqDrop_incHist_" {
			local varSet "$demoEdu $incIndiv $incOther $emplHist"
			}
			
			if "`vars'"=="_SeqDrop_migHist_" {
			local varSet "$demoEdu $incIndiv $incOther $emplHist $incHist"
			}
			
			if "`vars'"=="_SeqDrop_indu_" {
			local varSet "$demoEdu $migHist $incIndiv $incOther $emplHist $incHist"
			}
			
			if "`vars'"=="_SeqDrop_mun_" {
			local varSet "$demoEdu $migHist $incIndiv $incOther $emplHist $incHist $indu"
			}
			
			if "`vars'"=="_" {
			local varSet "$demoEdu $migHist $incIndiv $incOther $emplHist $incHist $indu $mun"
			}	

		reg emplAft6M_`unempl'M_In `varSet'
		estimates store `model'`vars'`unempl'
		
		}

		*******************************************************************************
		 * Clean and prepare the part of data for which we make predictions
		*******************************************************************************
		use "${data}/002_DataForR_`model'`vars'`year'.dta", clear
		
		* By keeping only those that appear in DataForR and in PredEnsemble we 
		* keep observations that were in the holdout sample:
		merge 1:1 n using "${data}/003_MainWithEnsemblePred_`model'`vars'`year'.dta"
		keep if _merge==3
		drop _merge
		
		gen DataYear = `year'
		compress

		*******************************************************************************
		 * Make predictions
		*******************************************************************************

		foreach months in 6 {
			foreach unempl in `unemplSpan' {
				
				estimates restore `model'`vars'`unempl'
				predict p_`model'`vars'`unempl'

				* Constrain prediction to be between 0 and 1:
				replace  p_`model'`vars'`unempl' = min(max(0, p_`model'`vars'`unempl'), 1)
				
				replace p_`model'`vars'`unempl' = . if `year'!=year(startU + `unempl'*30)
				

				*******************************************************************************
				 * Get graphs and statistics for predictions using linear model
				*******************************************************************************									
				
				* Calculate mean of the empirical JFRs in a year
				egen mean_e_Aft6M_`unempl'M_In_`model'`year' = 		///
					mean(emplAft6M_`unempl'M_In) if `year'==year(startU + `unempl'*30)

				
				* R-squared of outcome variable on predictions
				reg emplAft6M_`unempl'M_In p_`model'`vars'`unempl' if `year'==year(startU + `unempl'*30)
				
				if `unempl' == 0 {
					gen R_`model'_`year'=e(r2)
				}

			}

		}
		
		* Export statistics:
		preserve
			keep mean* R*
			drop if R_`model'_`year'==.
			keep if _n==1
			gen model = "`model'"
			gen vars = "`vars'"
			gen year = "`year'"
			 
			save "${data}/${id_code}_Linear_`model'`vars'`year'.dta", replace
		restore
		
		* Export predictions:
		keep LopNr_PersonNr InLnr p_`model'`vars'*
		
		rename p_`model'`vars'0 p_emplAft6M_0M_In_Linear
		if "`vars'" == "_" {
			rename p_`model'`vars'6 p_emplAft6M_6M_In_Linear
			rename p_`model'`vars'12 p_emplAft6M_12M_In_Linear
			}

			
		save "${data}/${id_code}_Linear_Predictions_`model'`vars'`year'.dta", replace
		
		}
	}
 }

 
* Combine all the stats into one file
foreach model in Full {
	
	foreach year in 2006 { 
	
	use "${data}/${id_code}_Linear_`model'_`year'.dta", clear

		foreach vars in _SeqDrop_mun_ _SeqDrop_indu_ _SeqDrop_incHist_ _SeqDrop_emplHist_ _SeqDrop_incOther_ _SeqDrop_incIndiv_ _SeqDrop_migHist_ {
		
		append using "${data}/${id_code}_Linear_`model'`vars'`year'.dta"
			
		}
		
	save "${data}/${id_code}_Linear_`model'_`year'_allVars.dta", replace
	
	}
}

/*******************************************************************************
		Predictions at Y months done with model trained at X months
*******************************************************************************/

* For the baseline model, we produce predictions for the JFR at X months using 
* the model trained on Y months data:
local model Full
local year 2006
local vars _

 * Clean and prepare the part of data for which we make predictions
use "${data}/002_DataForR_`model'`vars'`year'.dta", clear

* By keeping only those that appear in DataForR and in PredEnsemble we keep observations that were in the holdout sample
merge 1:1 n using "${data}/003_MainWithEnsemblePred_`model'`vars'`year'.dta"
keep if _merge==3
drop _merge p_empl*

gen DataYear = `year'
compress

 * Make predictions:
foreach unempl in 0 6 12 {
	foreach unempl2 in 0 6 12 {	
		
		estimates restore `model'`vars'`unempl2'
		predict p_emplAft6M_`unempl'M_In_`unempl2'M_Mod
		
		* Constrain prediction to be between 0 and 1:
		replace  p_emplAft6M_`unempl'M_In_`unempl2'M_Mod = min(max(0, p_emplAft6M_`unempl'M_In_`unempl2'M_Mod), 1)

		replace p_emplAft6M_`unempl'M_In_`unempl2'M_Mod = . if `year'!=year(startU + `unempl'*30)
		
	}

}
		
* Export predictions:
keep LopNr_PersonNr InLnr empl* p_*
		
save "${data}/${id_code}_Linear_Predictions_`model'`vars'`year'_xMonthPred_yMonthModel.dta", replace
